Soil Paper Figures
Loading Required Libraries
library(tidyverse)
library(stringr)
library(dplyr)
library(phyloseq)
library(zCompositions)
library(vegan)
library(ggforce)
library(ggpicrust2)
library(ggvegan)
library(microbiome)
library(scales)
library(RColorBrewer)
library(pheatmap)
library(microeco)
library(gridExtra)
library(cowplot)Taxanomic Analysis
Load Data
samdf <- read.delim("~/BecketLab_R/Soil/Soil Paper/soil_metadata_2.txt", sep = "\t", header = T, stringsAsFactors=FALSE)
load("~/BecketLab_R/Soil/Soil Paper/Singlem_df.RData")Create Phyloseq Object
# Convert singlem into phyloseq object
abundance_data <- joined_data[, 1:40]
taxonomy_data <- joined_data[, c("Root", "Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")]
rownames(samdf) <- colnames(abundance_data)
s_ps <- phyloseq(
otu_table(as.matrix(abundance_data), taxa_are_rows = TRUE),
tax_table(as.matrix(taxonomy_data)),
sample_data(samdf)
)Create Microtable object (microeco)
tidy_taxa <- tidy_taxonomy(
taxonomy_data
)
soil_microtable <- microtable$new(
abundance_data,
sample_table = samdf,
tax_table = tidy_taxa,
phylo_tree = NULL,
rep_fasta = NULL,
auto_tidy = FALSE
)Generate Colors
n <- 47
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col_vector## [1] "#7FC97F" "#BEAED4" "#FDC086" "#FFFF99" "#386CB0" "#F0027F" "#BF5B17"
## [8] "#666666" "#1B9E77" "#D95F02" "#7570B3" "#E7298A" "#66A61E" "#E6AB02"
## [15] "#A6761D" "#666666" "#A6CEE3" "#1F78B4" "#B2DF8A" "#33A02C" "#FB9A99"
## [22] "#E31A1C" "#FDBF6F" "#FF7F00" "#CAB2D6" "#6A3D9A" "#FFFF99" "#B15928"
## [29] "#FBB4AE" "#B3CDE3" "#CCEBC5" "#DECBE4" "#FED9A6" "#FFFFCC" "#E5D8BD"
## [36] "#FDDAEC" "#F2F2F2" "#B3E2CD" "#FDCDAC" "#CBD5E8" "#F4CAE4" "#E6F5C9"
## [43] "#FFF2AE" "#F1E2CC" "#CCCCCC" "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3"
## [50] "#FF7F00" "#FFFF33" "#A65628" "#F781BF" "#999999" "#66C2A5" "#FC8D62"
## [57] "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494" "#B3B3B3" "#8DD3C7"
## [64] "#FFFFB3" "#BEBADA" "#FB8072" "#80B1D3" "#FDB462" "#B3DE69" "#FCCDE5"
## [71] "#D9D9D9" "#BC80BD" "#CCEBC5" "#FFED6F"
Compositional Bar Chart: Phylum
# Phylum
t1 <- trans_abund$new(dataset = soil_microtable, taxrank = "Phylum", ntaxa = 10)
t1$data_abund$Land.cov <- factor(t1$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
# Class
t2 <- trans_abund$new(dataset = soil_microtable, taxrank = "Class", ntaxa = 15)
t2$data_abund$Land.cov <- factor(t2$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
# Genus
t3 <- trans_abund$new(dataset = soil_microtable, taxrank = "Family", ntaxa = 25)
t3$data_abund$Land.cov <- factor(t3$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
#################
g1 <- t1$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE, x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))
g1g2 <- t2$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE, x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))
g2g3 <- t3$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE, x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12)) + guides(fill=guide_legend(ncol=1,title = "Family"))
g3plot_grid(g1, g2, g3, ncol = 1, align = 'v', labels = "AUTO")#ggsave("~/BecketLab_R/Soil/Profiling/data/taxa_compositional_bar.svg", width = 15, height = 20, limitsize = FALSE)Species: Alpha Diversity
s_ps@sam_data$Land.cov <- factor(s_ps@sam_data$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
plot_richness(s_ps, x = "Land.cov", measures = c("Shannon", "Simpson"))+
geom_boxplot(fill = "white", color = "black") +
geom_point(aes(color = factor(site_id))) +
geom_jitter(aes(color = factor(site_id))) +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
) #ggsave("~/BecketLab_R/Soil/Soil Paper/output/taxa_alpha.pdf", width = 6, height = 8, limitsize = FALSE)Beta diversity Preprocessing - Species
otu_z <- cmultRepl(as.matrix(s_ps@otu_table), method = "GBM", output = 'p-counts', z.warning = 0.99)
#create clr function
clr <- function(x) sweep(log(x), 1, rowMeans(log(x)), "-")
#Transpose OTU tables
otu_tx <- data.frame(t(clr(t(otu_z))))
#To matrix
otu_m <- as.matrix(t(otu_tx))
######################### Make and extract NMDS scores
nmds = metaMDS(otu_m, distance = "euclidean")
nmds_scores = as.data.frame(nmds$points)
nmds_scores$pH <- samdf$pH
nmds_scores$Soil_H20 <- samdf$Soil_H2O
nmds_scores$Bulk.D <- samdf$Bulk.D
nmds_scores$NO3 <- samdf$NO3
nmds_scores$NH4 <- samdf$NH4
nmds_scores$LOI <- samdf$LOI
nmds_scores$N <- samdf$N....
nmds_scores$C <- samdf$C....
nmds_scores$site_id <- samdf$site_id
nmds_scores$soil_type <- samdf$Land.cov
#nmds_scores$Samples <- samples.out
colnames(nmds_scores) <- c("NMDS1", "NMDS2", "pH", "Soil_H20", "Bulk.D", "NO3", "NH4", "LOI", "N", "C", "site_id", "soil_type")Plot Beta
nmds_scores$soil_type <- factor(nmds_scores$soil_type, levels = c("C","O","A", "L", "IC", "IA"))
beta_plot <- ggplot(data = nmds_scores) +
ggtitle("Beta-diversity by Soil Type") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_mark_ellipse(aes(x=NMDS1,y=NMDS2,fill= soil_type, color= soil_type), expand = unit(0.5,"mm")) +
geom_point(aes(x=NMDS1, y=NMDS2, shape = soil_type)) +
labs(fill = "Land Cover Type", shape = "Land Cover Type", color = "Land Cover Type")
beta_plot#ggsave("~/BecketLab_R/Soil/Soil Paper/output/taxa_beta.pdf", width = 8, height = 5, limitsize = FALSE)Beta Stats (Anosim + PERMANOVA)
anosim(otu_m, samdf$Land.cov, distance = "euclidean", permutations = 999)##
## Call:
## anosim(x = otu_m, grouping = samdf$Land.cov, permutations = 999, distance = "euclidean")
## Dissimilarity: euclidean
##
## ANOSIM statistic R: 0.3794
## Significance: 0.001
##
## Permutation: free
## Number of permutations: 999
adonis2(formula = otu_m ~ samdf$Land.cov, permutations = 999, method = "euclidean")## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = otu_m ~ samdf$Land.cov, permutations = 999, method = "euclidean")
## Df SumOfSqs R2 F Pr(>F)
## Model 5 51552 0.27586 2.5905 0.001 ***
## Residual 34 135326 0.72414
## Total 39 186878 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
CCA Preprocessing - species
# CCA requires two dataframes:
# 1st main Data (rows: samples columns: factors[phylum or OTU])
# 2nd Driver of the dataset ex. any continuous counts (rain.Inches)
# subset the data to only preserve main data ONLY
# create a list of colnames bc rmd
cca_driver <- subset(nmds_scores, select = c("pH", "Soil_H20", "Bulk.D", "NO3", "NH4", "LOI", "N", "C"))
data_ps <- data.frame(t(s_ps@otu_table))
cca_ps <- cca(data_ps, cca_driver)
#plot(cca_ps)
fort_cca <- fortify(cca_ps)
# sites and biplot
sites <- which(fort_cca$score == "sites")
biplot <- which(fort_cca$score == "biplot")
fort_cca1 <- fort_cca[sites,]
drivers <- fort_cca[biplot,]
gg_soil_cca <- cbind(fort_cca1, subset(samdf, select = c("Land.cov")))Plot CCA - species
gg_soil_cca$Land.cov <- factor(gg_soil_cca$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
cca_gg <- ggplot(gg_soil_cca) +
ggtitle("CCA1 vs CCA2") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point(aes(x = CCA1, y = CCA2 , color = factor(Land.cov), shape = Land.cov)) +
geom_segment(data = drivers, aes(x = 0, y = 0, xend = 3*CCA1, yend = 3*CCA2), arrow = arrow(length = unit(0.1, "cm")), linewidth =0.5) +
geom_text(data = drivers, aes(x = 3.3*CCA1, y = 3.3*CCA2, label = label)) +
labs(color = "Land Cover Type", shape = "Land Cover Type")
cca_gg#ggsave("~/BecketLab_R/Soil/Soil Paper/output/taxa_cca.pdf", width = 8, height = 5, limitsize = FALSE)LEfSe
default_colors <- scales::hue_pal()(6)
t1 <- trans_diff$new(dataset = soil_microtable, method = "lefse", group = "Land.cov")
g1 <- t1$plot_diff_bar(color_values = default_colors,use_number = 1:20)
g1$labels$colour <- "Land Cover Type"
g1$labels$fill <- "Land Cover Type"
g1$labels$group <- "Land Cover Type"
g2 <- t1$plot_diff_abund(color_values = default_colors,use_number = 1:20, add_sig = TRUE)
g2 <- g2 + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.border = element_blank())
g2$labels$colour <- "Land Cover Type"
p <- g1 %>% aplot::insert_right(g2, width=3)
pg1 <- g1 + theme(text=element_text(size=18), axis.text.y = element_text(size = 20))
g2 <- g2 + theme(text=element_text(size=18), axis.text = element_text(size = 12))
p <- g1 %>% aplot::insert_right(g2, width=5)
p#ggsave("~/BecketLab_R/Soil/Profiling/data/taxa_lefse.svg", width = 20, height = 15, limitsize = FALSE)Pearson Correlation
t2 <- trans_diff$new(dataset = soil_microtable, method = "lefse", group = "Land.cov", taxa_level = "all")
# then create trans_env object
t1 <- trans_env$new(dataset = soil_microtable, add_data = soil_microtable$sample_table[,c(2:9)])
# use other_taxa to select taxa you need
t1$cal_cor(use_data = "other", p_adjust_method = "fdr", other_taxa = t2$res_diff$Taxa[1:40])
t1$plot_cor(pheatmap = FALSE)#ggsave("~/BecketLab_R/Soil/Soil Paper/output/taxa_pearsonCor.pdf", width = 10, height = 5, limitsize = FALSE)Testing
#t1$cal_cor(use_data = "other", p_adjust_method = "fdr", other_taxa = t2$res_diff$Taxa[1:10])
#t1$plot_cor(pheatmap = FALSE)Functional Analysis
Load Data
load("~/BecketLab_R/Soil/Functional/2023/kegg_funprofiler_df.RData")Create Phyloseq Object
# Build a phyloseq object
brite_abudance_table <- data.frame(funprofiler_data_wpathways[,2:41])
brite_taxa_df <- as.matrix(funprofiler_data_wpathways[,c(46,45,44)])
row.names(samdf) <- colnames(brite_abudance_table)
brite_ps <- phyloseq(
otu_table(as.matrix(brite_abudance_table), taxa_are_rows = TRUE),
tax_table(as.matrix(brite_taxa_df)),
sample_data(samdf)
)Create Microtable Object
soil_func_microtable <- microtable$new(
brite_abudance_table,
sample_table = samdf,
tax_table = data.frame(brite_taxa_df),
phylo_tree = NULL,
rep_fasta = NULL,
auto_tidy = TRUE
)Composititional Bar Chart
# A Level
t3 <- trans_abund$new(dataset = soil_func_microtable, taxrank = "A", ntaxa = 10)
t3$data_abund$Land.cov <- factor(t3$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
# B Level
t4 <- trans_abund$new(dataset = soil_func_microtable, taxrank = "B", ntaxa = 10)
t4$data_abund$Land.cov <- factor(t4$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
# C Level
t5 <- trans_abund$new(dataset = soil_func_microtable, taxrank = "kegg_pathway_name", ntaxa = 30)
t5$data_abund$Land.cov <- factor(t5$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
t3 <- t3$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE, x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))
t4 <- t4$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE, x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))
t5 <- t5$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE, x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))
t5plot_grid(t3, t4, t5, ncol = 1, align = 'v', labels = "AUTO")t3ggsave("~/BecketLab_R/Soil/Profiling/data/func_compositional_bar.png", width = 15, height = 5, limitsize = FALSE)Pathway Alpha Diversity
brite_ps@sam_data$Land.cov <- factor(brite_ps@sam_data$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
plot_richness(brite_ps, x = "Land.cov", measures = c("Shannon", "Simpson"))+
geom_boxplot(fill = "white", color = "black") +
geom_point(aes(color = factor(site_id))) +
geom_jitter(aes(color = factor(site_id))) +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
) #ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_alpha.pdf", width = 6, height = 8, limitsize = FALSE)Beta diversity Processing
otu_z<- cmultRepl(as.matrix(brite_ps@otu_table), method = "GBM", output = 'p-counts', z.warning = 0.99)
#create clr function
clr <- function(x) sweep(log(x), 1, rowMeans(log(x)), "-")
#Transpose OTU tables
otu_tx <- data.frame(t(clr(t(otu_z))))
#To matrix
otu_m <- as.matrix(t(otu_tx))
######################### Make and extract NMDS scores
nmds = metaMDS(otu_m, distance = "euclidean")
nmds_scores = as.data.frame(nmds$points)
nmds_scores$pH <- samdf$pH
nmds_scores$Soil_H20 <- samdf$Soil_H2O
nmds_scores$Bulk.D <- samdf$Bulk.D
nmds_scores$NO3 <- samdf$NO3
nmds_scores$NH4 <- samdf$NH4
nmds_scores$LOI <- samdf$LOI
nmds_scores$N <- samdf$N....
nmds_scores$C <- samdf$C....
nmds_scores$site_id <- samdf$site_id
nmds_scores$soil_type <- samdf$Land.cov
colnames(nmds_scores) <- c("NMDS1", "NMDS2", "pH", "Soil_H20", "Bulk.D", "NO3", "NH4", "LOI", "N", "C", "site_id", "soil_type")
nmds_scores$soil_type <- factor(nmds_scores$soil_type, levels = c("C","O","A", "L", "IC", "IA"))Plot Beta
beta_plot_brite <- ggplot(data = nmds_scores) +
ggtitle("Beta-diversity by Soil Type") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_mark_ellipse(aes(x=NMDS1,y=NMDS2,fill= soil_type, color= soil_type), expand = unit(0.5,"mm")) +
geom_point(aes(x=NMDS1, y=NMDS2, shape = soil_type)) +
labs(fill = "Land Cover Type", shape = "Land Cover Type", color = "Land Cover Type")
beta_plot_brite#ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_beta.pdf", width = 8, height = 5, limitsize = FALSE)Beta Stats (Anosim + PERMANOVA)
anosim(otu_m, samdf$Land.cov, distance = "euclidean", permutations = 99)##
## Call:
## anosim(x = otu_m, grouping = samdf$Land.cov, permutations = 99, distance = "euclidean")
## Dissimilarity: euclidean
##
## ANOSIM statistic R: 0.1599
## Significance: 0.01
##
## Permutation: free
## Number of permutations: 99
adonis2(formula = otu_m ~ samdf$Land.cov, permutations = 99, method = "euclidean")## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 99
##
## adonis2(formula = otu_m ~ samdf$Land.cov, permutations = 99, method = "euclidean")
## Df SumOfSqs R2 F Pr(>F)
## Model 5 119532 0.15822 1.2782 0.01 **
## Residual 34 635934 0.84178
## Total 39 755466 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
CCA Preprocessing
cca_driver <- subset(nmds_scores, select = c("pH", "Soil_H20", "Bulk.D", "NO3", "NH4", "LOI", "N", "C"))
data_ps <- data.frame(t(brite_ps@otu_table))
cca_ps <- cca(data_ps, cca_driver)
#plot(cca_ps)
fort_cca <- fortify(cca_ps)
sites <- which(fort_cca$score == "sites")
biplot <- which(fort_cca$score == "biplot")
fort_cca1 <- fort_cca[sites,]
drivers <- fort_cca[biplot,]
gg_soil_cca <- cbind(fort_cca1, subset(samdf, select = c("Land.cov")))
##
gg_soil_cca$Land.cov <- factor(gg_soil_cca$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))Plot CCA - Pathways
cca_gg <- ggplot(gg_soil_cca) +
ggtitle("CCA1 vs CCA2") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point(aes(x = CCA1, y = CCA2 , color = factor(Land.cov), shape = Land.cov)) +
geom_segment(data = drivers, aes(x = 0, y = 0, xend = 3*CCA1, yend = 3*CCA2), arrow = arrow(length = unit(0.1, "cm")), linewidth =0.5) +
geom_text(data = drivers, aes(x = 3.3*CCA1, y = 3.3*CCA2, label = label)) +
labs(color = "Land Cover Type", shape = "Land Cover Type")
cca_ggggsave("~/BecketLab_R/Soil/Soil Paper/output/func_cca.pdf", width = 8, height = 5, limitsize = FALSE)LEfse
default_colors <- scales::hue_pal()(6)
cust_default_colors <- default_colors[c(1,2,4)]
soil_t1 <- trans_diff$new(dataset = soil_func_microtable, method = "lefse", group = "Land.cov", taxa_level = "kegg_pathway_name")
soil_t1$res_diff$Group <- factor(soil_t1$res_diff$Group, levels = c("C","O","A", "L", "IC", "IA"))
soil_g1 <- soil_t1$plot_diff_bar(color_values = cust_default_colors)
soil_g1$labels$colour <- "Land Cover Type"
soil_g1$labels$fill <- "Land Cover Type"
soil_g1$labels$group <- "Land Cover Type"
soil_g2 <- soil_t1$plot_diff_abund(color_values = default_colors)
soil_g2 <- soil_g2 + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.border = element_blank())
soil_g2$labels$colour <- "Land Cover Type"
soil_g1 <- soil_g1 + theme(axis.text.y = element_text(size = 20))
soil_g2 <- soil_g2
soil_p <- soil_g1 %>% aplot::insert_right(soil_g2, width=3)
soil_psoil_g1soil_g2#ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_lefse.pdf", width = 10, height = 15, limitsize = FALSE)Pearson Correlation
soil_t2 <- trans_diff$new(dataset = soil_func_microtable, method = "lefse", group = "Land.cov", taxa_level = "kegg_pathway_name")
# then create trans_env object
soil_t1 <- trans_env$new(dataset = soil_func_microtable, add_data = soil_func_microtable$sample_table[,c(2:9)])
# use other_taxa to select taxa you need
soil_t1$cal_cor(use_data = "other", p_adjust_method = "fdr", other_taxa = soil_t2$res_diff$Taxa[1:40])
soil_t1$plot_cor(pheatmap = FALSE)#ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_pearsonCor.pdf", width = 10, height = 5, limitsize = FALSE)Additional Stats/Figures
Taxa: Alpha Kruskal Wallis Pairwise Comparison Testing
# Step 1: Extract richness data
richness_df <- estimate_richness(s_ps, measures = c("Shannon", "Simpson"))
richness_df$Land.cov <- sample_data(s_ps)$Land.cov
# Step 2: Perform Kruskal-Wallis test
kruskal_result_shan <- kruskal.test(Shannon ~ Land.cov, data = richness_df)
kruskal_result_simp <- kruskal.test(Simpson ~ Land.cov, data = richness_df)
print(kruskal_result_shan)##
## Kruskal-Wallis rank sum test
##
## data: Shannon by Land.cov
## Kruskal-Wallis chi-squared = 17.673, df = 5, p-value = 0.003385
print(kruskal_result_simp)##
## Kruskal-Wallis rank sum test
##
## data: Simpson by Land.cov
## Kruskal-Wallis chi-squared = 15.779, df = 5, p-value = 0.007503
# Step 3: Perform pairwise Wilcoxon test (equivalent to pairwise Kruskal-Wallis)
pairwise_test <- pairwise.wilcox.test(
richness_df$Shannon,
richness_df$Land.cov,
p.adjust.method = "BH" # Benjamini-Hochberg correction for multiple testing
)
pairwise_test2 <- pairwise.wilcox.test(
richness_df$Simpson,
richness_df$Land.cov,
p.adjust.method = "BH" # Benjamini-Hochberg correction for multiple testing
)
print(pairwise_test)##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: richness_df$Shannon and richness_df$Land.cov
##
## C O A L IC
## O 0.832 - - - -
## A 0.083 0.418 - - -
## L 0.878 0.855 0.429 - -
## IC 0.024 0.024 0.083 0.035 -
## IA 0.024 0.024 0.035 0.024 0.429
##
## P value adjustment method: BH
print(pairwise_test2)##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: richness_df$Simpson and richness_df$Land.cov
##
## C O A L IC
## O 0.798 - - - -
## A 0.745 0.321 - - -
## L 0.772 0.552 0.521 - -
## IC 0.071 0.030 0.321 0.136 -
## IA 0.030 0.030 0.048 0.030 0.122
##
## P value adjustment method: BH
Func: Alpha Kruskal Wallis Pairwise Comparison Testing
# Step 1: Extract richness data
richness_df <- estimate_richness(brite_ps, measures = c("Shannon", "Simpson"))
richness_df$Land.cov <- sample_data(brite_ps)$Land.cov
# Step 2: Perform Kruskal-Wallis test
kruskal_result_shan <- kruskal.test(Shannon ~ Land.cov, data = richness_df)
kruskal_result_simp <- kruskal.test(Simpson ~ Land.cov, data = richness_df)
print(kruskal_result_shan)##
## Kruskal-Wallis rank sum test
##
## data: Shannon by Land.cov
## Kruskal-Wallis chi-squared = 8.2619, df = 5, p-value = 0.1424
print(kruskal_result_simp)##
## Kruskal-Wallis rank sum test
##
## data: Simpson by Land.cov
## Kruskal-Wallis chi-squared = 4.5814, df = 5, p-value = 0.4691
# Step 3: Perform pairwise Wilcoxon test (equivalent to pairwise Kruskal-Wallis)
pairwise_test <- pairwise.wilcox.test(
richness_df$Shannon,
richness_df$Land.cov,
p.adjust.method = "BH" # Benjamini-Hochberg correction for multiple testing
)
pairwise_test2 <- pairwise.wilcox.test(
richness_df$Simpson,
richness_df$Land.cov,
p.adjust.method = "BH" # Benjamini-Hochberg correction for multiple testing
)
print(pairwise_test)##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: richness_df$Shannon and richness_df$Land.cov
##
## C O A L IC
## O 0.93 - - - -
## A 0.93 1.00 - - -
## L 0.18 0.18 0.19 - -
## IC 0.93 1.00 0.93 0.18 -
## IA 0.92 1.00 0.93 0.18 1.00
##
## P value adjustment method: BH
print(pairwise_test2)##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: richness_df$Simpson and richness_df$Land.cov
##
## C O A L IC
## O 0.71 - - - -
## A 0.96 0.86 - - -
## L 0.98 0.71 0.71 - -
## IC 0.86 1.00 1.00 0.71 -
## IA 0.71 1.00 0.98 0.71 1.00
##
## P value adjustment method: BH
Taxa: Pearson Correlation grouped by Land Cover Type
t2 <- trans_diff$new(dataset = soil_microtable, method = "lefse", group = "Land.cov", taxa_level = "Phylum")
# then create trans_env object
t1 <- trans_env$new(dataset = soil_microtable, add_data = soil_microtable$sample_table[,c(2:9)])
# use other_taxa to select taxa you need
t1$cal_cor(by_group = "Land.cov",use_data = "other", p_adjust_method = "fdr", other_taxa = t2$res_diff$Taxa[1:40])
t1$res_cor$by_group <- factor(t1$res_cor$by_group, levels = c("C","O","A", "L", "IC", "IA"))
t1$plot_cor(pheatmap = FALSE)#ggsave("~/BecketLab_R/Soil/Soil Paper/output/sup_taxa_pearsonCor.pdf", width = 20, height = 10, limitsize = FALSE)Func: Pearson Correlation grouped by Land Cover Type
soil_t2 <- trans_diff$new(dataset = soil_func_microtable, method = "lefse", group = "Land.cov", taxa_level = "kegg_pathway_name")
# then create trans_env object
soil_t1 <- trans_env$new(dataset = soil_func_microtable, add_data = soil_func_microtable$sample_table[,c(2:9)])
# use other_taxa to select taxa you need
soil_t1$cal_cor(by_group = "Land.cov",use_data = "other", p_adjust_method = "fdr", other_taxa = soil_t2$res_diff$Taxa[1:40])
soil_t1$res_cor$by_group <- factor(soil_t1$res_cor$by_group, levels = c("C","O","A", "L", "IC", "IA"))
soil_t1$plot_cor(pheatmap = FALSE)#ggsave("~/BecketLab_R/Soil/Soil Paper/output/sup_func_pearsonCor.pdf", width = 20, height = 10, limitsize = FALSE)Feature Importance: Taxanomic Metadata - Land Cover Type
set.seed(145)
soil_t1 <- trans_env$new(dataset = soil_microtable, add_data = soil_microtable$sample_table[,c(2:9)])
soil_tmp <- soil_t1$data_env %>% t %>% as.data.frame
# caret package should be installed
soil_t2 <- trans_classifier$new(dataset = soil_microtable, x.predictors = soil_tmp, y.response = "Land.cov")
soil_t2$cal_preProcess(method = c("center", "scale", "nzv"))
# All samples are used in training if cal_split function is not performed
soil_t2$cal_train(method = "rf")
# generate significance with rfPermute package
# require rfPermute package to be installed
soil_t2$cal_feature_imp(rf_feature_sig = TRUE)
soil_t2$plot_feature_imp(show_sig_group = TRUE, coord_flip = TRUE, width = 0.6, add_sig = TRUE)#ggsave("~/BecketLab_R/Soil/Soil Paper/output/sup_taxa_meta_randomforest.pdf", width = 10, height = 10, limitsize = FALSE)